from sklearn.feature_extraction.text import CountVectorizer
X = ['我 爱 你','我 恨 你 恨 你']
countCoder = CountVectorizer(token_pattern="[a-zA-Z|\u4e00-\u9fa5]+")
X = countCoder.fit_transform(X)
print(countCoder.get_feature_names())
print(X.toarray())

